import pandas as pd
import warnings
warnings.filterwarnings('ignore')
crp_rcmnd = pd.read_csv("Crop_recommendation.csv")
crp_rcmnd
| N | P | K | temperature | humidity | ph | rainfall | label | |
|---|---|---|---|---|---|---|---|---|
| 0 | 90 | 42 | 43 | 20.879744 | 82.002744 | 6.502985 | 202.935536 | rice |
| 1 | 85 | 58 | 41 | 21.770462 | 80.319644 | 7.038096 | 226.655537 | rice |
| 2 | 60 | 55 | 44 | 23.004459 | 82.320763 | 7.840207 | 263.964248 | rice |
| 3 | 74 | 35 | 40 | 26.491096 | 80.158363 | 6.980401 | 242.864034 | rice |
| 4 | 78 | 42 | 42 | 20.130175 | 81.604873 | 7.628473 | 262.717340 | rice |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2195 | 107 | 34 | 32 | 26.774637 | 66.413269 | 6.780064 | 177.774507 | coffee |
| 2196 | 99 | 15 | 27 | 27.417112 | 56.636362 | 6.086922 | 127.924610 | coffee |
| 2197 | 118 | 33 | 30 | 24.131797 | 67.225123 | 6.362608 | 173.322839 | coffee |
| 2198 | 117 | 32 | 34 | 26.272418 | 52.127394 | 6.758793 | 127.175293 | coffee |
| 2199 | 104 | 18 | 30 | 23.603016 | 60.396475 | 6.779833 | 140.937041 | coffee |
2200 rows × 8 columns
crp_rcmnd.dtypes
N int64 P int64 K int64 temperature float64 humidity float64 ph float64 rainfall float64 label object dtype: object
crp_rcmnd.columns
Index(['N', 'P', 'K', 'temperature', 'humidity', 'ph', 'rainfall', 'label'], dtype='object')
crp_rcmnd.index
RangeIndex(start=0, stop=2200, step=1)
crp_rcmnd.describe()
| N | P | K | temperature | humidity | ph | rainfall | |
|---|---|---|---|---|---|---|---|
| count | 2200.000000 | 2200.000000 | 2200.000000 | 2200.000000 | 2200.000000 | 2200.000000 | 2200.000000 |
| mean | 50.551818 | 53.362727 | 48.149091 | 25.616244 | 71.481779 | 6.469480 | 103.463655 |
| std | 36.917334 | 32.985883 | 50.647931 | 5.063749 | 22.263812 | 0.773938 | 54.958389 |
| min | 0.000000 | 5.000000 | 5.000000 | 8.825675 | 14.258040 | 3.504752 | 20.211267 |
| 25% | 21.000000 | 28.000000 | 20.000000 | 22.769375 | 60.261953 | 5.971693 | 64.551686 |
| 50% | 37.000000 | 51.000000 | 32.000000 | 25.598693 | 80.473146 | 6.425045 | 94.867624 |
| 75% | 84.250000 | 68.000000 | 49.000000 | 28.561654 | 89.948771 | 6.923643 | 124.267508 |
| max | 140.000000 | 145.000000 | 205.000000 | 43.675493 | 99.981876 | 9.935091 | 298.560117 |
crp_rcmnd.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 2200 entries, 0 to 2199 Data columns (total 8 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 N 2200 non-null int64 1 P 2200 non-null int64 2 K 2200 non-null int64 3 temperature 2200 non-null float64 4 humidity 2200 non-null float64 5 ph 2200 non-null float64 6 rainfall 2200 non-null float64 7 label 2200 non-null object dtypes: float64(4), int64(3), object(1) memory usage: 137.6+ KB
crp_rcmnd.mean()
N 50.551818 P 53.362727 K 48.149091 temperature 25.616244 humidity 71.481779 ph 6.469480 rainfall 103.463655 dtype: float64
crp_rcmnd.head()
| N | P | K | temperature | humidity | ph | rainfall | label | |
|---|---|---|---|---|---|---|---|---|
| 0 | 90 | 42 | 43 | 20.879744 | 82.002744 | 6.502985 | 202.935536 | rice |
| 1 | 85 | 58 | 41 | 21.770462 | 80.319644 | 7.038096 | 226.655537 | rice |
| 2 | 60 | 55 | 44 | 23.004459 | 82.320763 | 7.840207 | 263.964248 | rice |
| 3 | 74 | 35 | 40 | 26.491096 | 80.158363 | 6.980401 | 242.864034 | rice |
| 4 | 78 | 42 | 42 | 20.130175 | 81.604873 | 7.628473 | 262.717340 | rice |
crp_rcmnd.tail()
| N | P | K | temperature | humidity | ph | rainfall | label | |
|---|---|---|---|---|---|---|---|---|
| 2195 | 107 | 34 | 32 | 26.774637 | 66.413269 | 6.780064 | 177.774507 | coffee |
| 2196 | 99 | 15 | 27 | 27.417112 | 56.636362 | 6.086922 | 127.924610 | coffee |
| 2197 | 118 | 33 | 30 | 24.131797 | 67.225123 | 6.362608 | 173.322839 | coffee |
| 2198 | 117 | 32 | 34 | 26.272418 | 52.127394 | 6.758793 | 127.175293 | coffee |
| 2199 | 104 | 18 | 30 | 23.603016 | 60.396475 | 6.779833 | 140.937041 | coffee |
crp_rcmnd.loc[175]
N 77 P 52 K 17 temperature 24.863749 humidity 65.742005 ph 5.7148 rainfall 75.822705 label maize Name: 175, dtype: object
crp_rcmnd.label
0 rice
1 rice
2 rice
3 rice
4 rice
...
2195 coffee
2196 coffee
2197 coffee
2198 coffee
2199 coffee
Name: label, Length: 2200, dtype: object
crp_rcmnd[crp_rcmnd.label == "mango"]
| N | P | K | temperature | humidity | ph | rainfall | label | |
|---|---|---|---|---|---|---|---|---|
| 1100 | 2 | 40 | 27 | 29.737700 | 47.548852 | 5.954627 | 90.095869 | mango |
| 1101 | 39 | 24 | 31 | 33.556956 | 53.729798 | 4.757115 | 98.675276 | mango |
| 1102 | 21 | 26 | 27 | 27.003155 | 47.675254 | 5.699587 | 95.851183 | mango |
| 1103 | 25 | 22 | 25 | 33.561502 | 45.535566 | 5.977414 | 95.705259 | mango |
| 1104 | 0 | 21 | 32 | 35.898556 | 54.259642 | 6.430139 | 92.197217 | mango |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1195 | 19 | 38 | 26 | 31.484517 | 48.779263 | 4.525722 | 93.172220 | mango |
| 1196 | 21 | 21 | 30 | 27.698193 | 51.415932 | 5.403908 | 100.772070 | mango |
| 1197 | 22 | 18 | 33 | 30.412358 | 52.481006 | 6.621624 | 93.923759 | mango |
| 1198 | 31 | 20 | 30 | 32.177520 | 54.013527 | 6.207496 | 91.887661 | mango |
| 1199 | 18 | 26 | 31 | 32.611261 | 47.749165 | 5.418475 | 91.101908 | mango |
100 rows × 8 columns
crp_rcmnd[crp_rcmnd.label == "watermelon"]
| N | P | K | temperature | humidity | ph | rainfall | label | |
|---|---|---|---|---|---|---|---|---|
| 1300 | 119 | 25 | 51 | 26.473302 | 80.922544 | 6.283818 | 53.657426 | watermelon |
| 1301 | 119 | 19 | 55 | 25.187800 | 83.446217 | 6.818261 | 46.874209 | watermelon |
| 1302 | 105 | 30 | 50 | 25.299547 | 81.775276 | 6.376201 | 57.041471 | watermelon |
| 1303 | 114 | 8 | 50 | 24.746313 | 88.308663 | 6.581588 | 57.958261 | watermelon |
| 1304 | 93 | 22 | 52 | 26.587407 | 81.325632 | 6.932740 | 41.875400 | watermelon |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 1395 | 97 | 12 | 47 | 25.287846 | 89.636679 | 6.765095 | 58.286977 | watermelon |
| 1396 | 110 | 7 | 45 | 26.638386 | 84.695469 | 6.189214 | 48.324286 | watermelon |
| 1397 | 96 | 18 | 50 | 25.331045 | 84.305338 | 6.904242 | 41.532187 | watermelon |
| 1398 | 83 | 23 | 55 | 26.897502 | 83.892415 | 6.463271 | 43.971937 | watermelon |
| 1399 | 120 | 24 | 47 | 26.986037 | 89.413849 | 6.260839 | 58.548767 | watermelon |
100 rows × 8 columns
crp_rcmnd[crp_rcmnd.ph <= 4.0]
| N | P | K | temperature | humidity | ph | rainfall | label | |
|---|---|---|---|---|---|---|---|---|
| 500 | 3 | 49 | 18 | 27.910952 | 64.709306 | 3.692864 | 32.678919 | mothbeans |
| 521 | 22 | 49 | 22 | 28.234947 | 61.562052 | 3.711059 | 72.666664 | mothbeans |
| 526 | 8 | 60 | 18 | 31.216300 | 46.018682 | 3.808429 | 53.120528 | mothbeans |
| 529 | 36 | 43 | 24 | 27.094006 | 43.653054 | 3.510404 | 41.537495 | mothbeans |
| 535 | 11 | 45 | 19 | 28.700121 | 44.359648 | 3.828031 | 44.116221 | mothbeans |
| 537 | 17 | 57 | 20 | 28.506779 | 45.200945 | 3.793575 | 66.176146 | mothbeans |
| 557 | 4 | 46 | 15 | 31.012749 | 62.403925 | 3.504752 | 63.771924 | mothbeans |
| 561 | 35 | 51 | 17 | 28.799292 | 49.842134 | 3.558823 | 40.855347 | mothbeans |
| 582 | 19 | 51 | 25 | 26.804744 | 48.239914 | 3.525366 | 43.878020 | mothbeans |
| 599 | 16 | 51 | 21 | 31.019636 | 49.976752 | 3.532009 | 32.812965 | mothbeans |
pd.crosstab(crp_rcmnd.N ,crp_rcmnd.label)
| label | apple | banana | blackgram | chickpea | coconut | coffee | cotton | grapes | jute | kidneybeans | ... | mango | mothbeans | mungbean | muskmelon | orange | papaya | pigeonpeas | pomegranate | rice | watermelon |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| N | |||||||||||||||||||||
| 0 | 3 | 0 | 0 | 0 | 4 | 0 | 0 | 1 | 0 | 2 | ... | 3 | 1 | 1 | 0 | 4 | 0 | 1 | 3 | 0 | 0 |
| 1 | 2 | 0 | 0 | 0 | 3 | 0 | 0 | 1 | 0 | 1 | ... | 3 | 0 | 2 | 0 | 3 | 0 | 3 | 1 | 0 | 0 |
| 2 | 6 | 0 | 0 | 0 | 2 | 0 | 0 | 1 | 0 | 1 | ... | 4 | 2 | 3 | 0 | 0 | 0 | 1 | 2 | 0 | 0 |
| 3 | 1 | 0 | 0 | 0 | 2 | 0 | 0 | 2 | 0 | 2 | ... | 2 | 3 | 0 | 0 | 0 | 0 | 3 | 3 | 0 | 0 |
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 1 | ... | 2 | 4 | 5 | 0 | 3 | 0 | 1 | 6 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 134 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 135 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 136 | 0 | 0 | 0 | 0 | 0 | 0 | 2 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 139 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
| 140 | 0 | 0 | 0 | 0 | 0 | 0 | 3 | 0 | 0 | 0 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
137 rows × 22 columns
crp_rcmnd.groupby(["label"]).mean()
| N | P | K | temperature | humidity | ph | rainfall | |
|---|---|---|---|---|---|---|---|
| label | |||||||
| apple | 20.80 | 134.22 | 199.89 | 22.630942 | 92.333383 | 5.929663 | 112.654779 |
| banana | 100.23 | 82.01 | 50.05 | 27.376798 | 80.358123 | 5.983893 | 104.626980 |
| blackgram | 40.02 | 67.47 | 19.24 | 29.973340 | 65.118426 | 7.133952 | 67.884151 |
| chickpea | 40.09 | 67.79 | 79.92 | 18.872847 | 16.860439 | 7.336957 | 80.058977 |
| coconut | 21.98 | 16.93 | 30.59 | 27.409892 | 94.844272 | 5.976562 | 175.686646 |
| coffee | 101.20 | 28.74 | 29.94 | 25.540477 | 58.869846 | 6.790308 | 158.066295 |
| cotton | 117.77 | 46.24 | 19.56 | 23.988958 | 79.843474 | 6.912675 | 80.398043 |
| grapes | 23.18 | 132.53 | 200.11 | 23.849575 | 81.875228 | 6.025937 | 69.611829 |
| jute | 78.40 | 46.86 | 39.99 | 24.958376 | 79.639864 | 6.732778 | 174.792798 |
| kidneybeans | 20.75 | 67.54 | 20.05 | 20.115085 | 21.605357 | 5.749411 | 105.919778 |
| lentil | 18.77 | 68.36 | 19.41 | 24.509052 | 64.804785 | 6.927932 | 45.680454 |
| maize | 77.76 | 48.44 | 19.79 | 22.389204 | 65.092249 | 6.245190 | 84.766988 |
| mango | 20.07 | 27.18 | 29.92 | 31.208770 | 50.156573 | 5.766373 | 94.704515 |
| mothbeans | 21.44 | 48.01 | 20.23 | 28.194920 | 53.160418 | 6.831174 | 51.198487 |
| mungbean | 20.99 | 47.28 | 19.87 | 28.525775 | 85.499975 | 6.723957 | 48.403601 |
| muskmelon | 100.32 | 17.72 | 50.08 | 28.663066 | 92.342802 | 6.358805 | 24.689952 |
| orange | 19.58 | 16.55 | 10.01 | 22.765725 | 92.170209 | 7.016957 | 110.474969 |
| papaya | 49.88 | 59.05 | 50.04 | 33.723859 | 92.403388 | 6.741442 | 142.627839 |
| pigeonpeas | 20.73 | 67.73 | 20.29 | 27.741762 | 48.061633 | 5.794175 | 149.457564 |
| pomegranate | 18.87 | 18.75 | 40.21 | 21.837842 | 90.125504 | 6.429172 | 107.528442 |
| rice | 79.89 | 47.58 | 39.87 | 23.689332 | 82.272822 | 6.425471 | 236.181114 |
| watermelon | 99.42 | 17.00 | 50.22 | 25.591767 | 85.160375 | 6.495778 | 50.786219 |
crp_rcmnd.isnull()
| N | P | K | temperature | humidity | ph | rainfall | label | |
|---|---|---|---|---|---|---|---|---|
| 0 | False | False | False | False | False | False | False | False |
| 1 | False | False | False | False | False | False | False | False |
| 2 | False | False | False | False | False | False | False | False |
| 3 | False | False | False | False | False | False | False | False |
| 4 | False | False | False | False | False | False | False | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2195 | False | False | False | False | False | False | False | False |
| 2196 | False | False | False | False | False | False | False | False |
| 2197 | False | False | False | False | False | False | False | False |
| 2198 | False | False | False | False | False | False | False | False |
| 2199 | False | False | False | False | False | False | False | False |
2200 rows × 8 columns
crp_rcmnd.isnull().values.any()
False
crp_rcmnd = crp_rcmnd.drop("rainfall", axis =1)
crp_rcmnd
| N | P | K | temperature | humidity | ph | rainfall | label | |
|---|---|---|---|---|---|---|---|---|
| 0 | 90 | 42 | 43 | 20.879744 | 82.002744 | 6.502985 | 202.935536 | rice |
| 1 | 85 | 58 | 41 | 21.770462 | 80.319644 | 7.038096 | 226.655537 | rice |
| 2 | 60 | 55 | 44 | 23.004459 | 82.320763 | 7.840207 | 263.964248 | rice |
| 3 | 74 | 35 | 40 | 26.491096 | 80.158363 | 6.980401 | 242.864034 | rice |
| 4 | 78 | 42 | 42 | 20.130175 | 81.604873 | 7.628473 | 262.717340 | rice |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2195 | 107 | 34 | 32 | 26.774637 | 66.413269 | 6.780064 | 177.774507 | coffee |
| 2196 | 99 | 15 | 27 | 27.417112 | 56.636362 | 6.086922 | 127.924610 | coffee |
| 2197 | 118 | 33 | 30 | 24.131797 | 67.225123 | 6.362608 | 173.322839 | coffee |
| 2198 | 117 | 32 | 34 | 26.272418 | 52.127394 | 6.758793 | 127.175293 | coffee |
| 2199 | 104 | 18 | 30 | 23.603016 | 60.396475 | 6.779833 | 140.937041 | coffee |
2200 rows × 8 columns
crp_rcmnd.sample(frac = 1).head(10)
| N | P | K | temperature | humidity | ph | rainfall | label | |
|---|---|---|---|---|---|---|---|---|
| 1453 | 93 | 22 | 48 | 29.125337 | 91.522911 | 6.776988 | 21.904404 | muskmelon |
| 2143 | 113 | 33 | 34 | 26.003740 | 62.144510 | 6.559817 | 153.477776 | coffee |
| 726 | 49 | 68 | 22 | 28.568406 | 61.532786 | 7.127064 | 63.497263 | blackgram |
| 236 | 57 | 58 | 77 | 18.726494 | 17.584064 | 7.978997 | 81.201765 | chickpea |
| 1457 | 100 | 14 | 49 | 29.488830 | 91.075742 | 6.365957 | 26.019094 | muskmelon |
| 1896 | 28 | 27 | 32 | 28.940997 | 93.001090 | 5.764615 | 191.772309 | coconut |
| 2181 | 101 | 31 | 26 | 26.708975 | 69.711841 | 6.861235 | 158.860889 | coffee |
| 1505 | 32 | 137 | 204 | 22.860066 | 93.128599 | 5.824152 | 117.729673 | apple |
| 733 | 53 | 67 | 17 | 31.776817 | 69.018529 | 7.296972 | 61.468929 | blackgram |
| 451 | 24 | 73 | 20 | 19.637362 | 32.315289 | 4.608695 | 176.413409 | pigeonpeas |
crp_rcmnd.shape
(2200, 8)
crp_rcmnd.duplicated().sum()
0
crp_rcmnd.corr()
| N | P | K | temperature | humidity | ph | rainfall | |
|---|---|---|---|---|---|---|---|
| N | 1.000000 | -0.231460 | -0.140512 | 0.026504 | 0.190688 | 0.096683 | 0.059020 |
| P | -0.231460 | 1.000000 | 0.736232 | -0.127541 | -0.118734 | -0.138019 | -0.063839 |
| K | -0.140512 | 0.736232 | 1.000000 | -0.160387 | 0.190859 | -0.169503 | -0.053461 |
| temperature | 0.026504 | -0.127541 | -0.160387 | 1.000000 | 0.205320 | -0.017795 | -0.030084 |
| humidity | 0.190688 | -0.118734 | 0.190859 | 0.205320 | 1.000000 | -0.008483 | 0.094423 |
| ph | 0.096683 | -0.138019 | -0.169503 | -0.017795 | -0.008483 | 1.000000 | -0.109069 |
| rainfall | 0.059020 | -0.063839 | -0.053461 | -0.030084 | 0.094423 | -0.109069 | 1.000000 |
import seaborn as sns
import matplotlib as plt
import matplotlib.pyplot as plt
import numpy as np
corr = crp_rcmnd.corr()
sns.heatmap(corr, annot=True, cbar=True, cmap= 'coolwarm')
<AxesSubplot:>
crp_rcmnd = crp_rcmnd.drop("rainfall", axis =1)
crp_rcmnd
| N | P | K | temperature | humidity | ph | label | |
|---|---|---|---|---|---|---|---|
| 0 | 90 | 42 | 43 | 20.879744 | 82.002744 | 6.502985 | rice |
| 1 | 85 | 58 | 41 | 21.770462 | 80.319644 | 7.038096 | rice |
| 2 | 60 | 55 | 44 | 23.004459 | 82.320763 | 7.840207 | rice |
| 3 | 74 | 35 | 40 | 26.491096 | 80.158363 | 6.980401 | rice |
| 4 | 78 | 42 | 42 | 20.130175 | 81.604873 | 7.628473 | rice |
| ... | ... | ... | ... | ... | ... | ... | ... |
| 2195 | 107 | 34 | 32 | 26.774637 | 66.413269 | 6.780064 | coffee |
| 2196 | 99 | 15 | 27 | 27.417112 | 56.636362 | 6.086922 | coffee |
| 2197 | 118 | 33 | 30 | 24.131797 | 67.225123 | 6.362608 | coffee |
| 2198 | 117 | 32 | 34 | 26.272418 | 52.127394 | 6.758793 | coffee |
| 2199 | 104 | 18 | 30 | 23.603016 | 60.396475 | 6.779833 | coffee |
2200 rows × 7 columns
crp_rcmnd["label"].unique()
array(['rice', 'maize', 'chickpea', 'kidneybeans', 'pigeonpeas',
'mothbeans', 'mungbean', 'blackgram', 'lentil', 'pomegranate',
'banana', 'mango', 'grapes', 'watermelon', 'muskmelon', 'apple',
'orange', 'papaya', 'coconut', 'cotton', 'jute', 'coffee'],
dtype=object)
crp_rcmnd["label"].value_counts()
rice 100 maize 100 jute 100 cotton 100 coconut 100 papaya 100 orange 100 apple 100 muskmelon 100 watermelon 100 grapes 100 mango 100 banana 100 pomegranate 100 lentil 100 blackgram 100 mungbean 100 mothbeans 100 pigeonpeas 100 kidneybeans 100 chickpea 100 coffee 100 Name: label, dtype: int64
crp_rcmnd.plot(x = "label", y ="N" , kind= "scatter", figsize=(28,5))
<AxesSubplot:xlabel='label', ylabel='N'>
crp_rcmnd["P"].plot.hist()
<AxesSubplot:ylabel='Frequency'>
orange = crp_rcmnd[crp_rcmnd["label"] == "orange"]
len(orange)
100
orange.head()
| N | P | K | temperature | humidity | ph | label | |
|---|---|---|---|---|---|---|---|
| 1600 | 22 | 30 | 12 | 15.781442 | 92.510777 | 6.354007 | orange |
| 1601 | 37 | 6 | 13 | 26.030973 | 91.508193 | 7.511755 | orange |
| 1602 | 27 | 13 | 6 | 13.360506 | 91.356082 | 7.335158 | orange |
| 1603 | 7 | 16 | 9 | 18.879577 | 92.043045 | 7.813917 | orange |
| 1604 | 20 | 7 | 9 | 29.477417 | 91.578029 | 7.129137 | orange |
crp_rcmnd_mean = pd.pivot_table(crp_rcmnd,index=['label'],aggfunc='mean')
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import random
number_of_colors = 16
col_arr = ["#"+''.join([random.choice('0123456789ABCDEF') for j in range(6)])
for i in range(number_of_colors)]
phos_summary = crp_rcmnd_mean.sort_values(by='P',ascending=False)
fig = make_subplots(rows=1, cols=2)
top_10 = { 'y': phos_summary['P'][0:10].sort_values().index,
'x': phos_summary['P'][0:10].sort_values()
}
last_10 = { 'y': phos_summary['P'][-10:].sort_values().index,
'x': phos_summary['P'][-10:].sort_values()
}
fig.add_trace(
go.Bar( top_10,
marker_color= random.choice(col_arr),
name = 'Most Phosporus Required',
orientation = 'h',
text = top_10['x']
),
row=1, col=1
)
fig.add_trace(
go.Bar( last_10,
marker_color= random.choice(col_arr),
name = 'Least Phosporus Required',
orientation = 'h',
text = last_10['x']
),
row=1, col=2
)
fig.update_traces(texttemplate='%{text}',textposition='inside')
fig.update_layout(title='Phosporus',font_size=12)
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)
fig.show()
fig = go.Figure()
fig.add_trace( go.Bar( x = crp_rcmnd_mean.index,
y = crp_rcmnd_mean['N'],
marker_color = 'blue',
name = 'Nitrogen'
)
)
fig.add_trace( go.Bar( x = crp_rcmnd_mean.index,
y = crp_rcmnd_mean['P'],
marker_color = 'light blue',
name = 'Phosphorous'
)
)
fig.add_trace( go.Bar( x = crp_rcmnd_mean.index,
y = crp_rcmnd_mean['K'],
marker_color = 'green',
name = 'Potassium'
)
)
fig.update_layout(title='Comparison between N, P, k')
features = crp_rcmnd[['N', 'P','K','temperature', 'humidity', 'ph']]
target = crp_rcmnd['label']
labels = crp_rcmnd['label']
acc = []
model = []
from sklearn.model_selection import train_test_split
Xtrain, Xtest, Ytrain, Ytest = train_test_split(features,target,test_size = 0.2,random_state =2)
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
DecisionTree = DecisionTreeClassifier(criterion="entropy",random_state=2,max_depth=11)
DecisionTree.fit(Xtrain,Ytrain)
predicted_values = DecisionTree.predict(Xtest)
x = metrics.accuracy_score(Ytest, predicted_values)
acc.append(x)
model.append('Decision Tree')
print("DecisionTrees's Accuracy is: ", x*100)
print(classification_report(Ytest,predicted_values))
DecisionTrees's Accuracy is: 95.9090909090909
precision recall f1-score support
apple 1.00 1.00 1.00 13
banana 1.00 1.00 1.00 17
blackgram 0.73 1.00 0.84 16
chickpea 1.00 1.00 1.00 21
coconut 1.00 1.00 1.00 21
coffee 1.00 1.00 1.00 22
cotton 1.00 1.00 1.00 20
grapes 1.00 1.00 1.00 18
jute 0.81 0.89 0.85 28
kidneybeans 1.00 1.00 1.00 14
lentil 1.00 0.78 0.88 23
maize 1.00 1.00 1.00 21
mango 1.00 1.00 1.00 26
mothbeans 0.94 0.84 0.89 19
mungbean 1.00 1.00 1.00 24
muskmelon 1.00 1.00 1.00 23
orange 1.00 1.00 1.00 29
papaya 1.00 0.95 0.97 19
pigeonpeas 0.89 0.94 0.92 18
pomegranate 1.00 1.00 1.00 17
rice 0.79 0.69 0.73 16
watermelon 1.00 1.00 1.00 15
accuracy 0.96 440
macro avg 0.96 0.96 0.96 440
weighted avg 0.96 0.96 0.96 440
from sklearn.model_selection import cross_val_score
# Cross validation score (Decision Tree)
score = cross_val_score(DecisionTree, features, target,cv=5)
score
array([0.95227273, 0.95454545, 0.95681818, 0.94772727, 0.95227273])
import pickle
# Dump the trained Naive Bayes classifier with Pickle
DT_pkl_filename = 'DecisionTree.pkl'
# Open the file to save as pkl file
DT_Model_pkl = open(DT_pkl_filename, 'wb')
pickle.dump(DecisionTree, DT_Model_pkl)
# Close the pickle instances
DT_Model_pkl.close()
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Ytest, predicted_values)
plt.figure(figsize=(15,15))
sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Confusion Matrix - score:'+str(accuracy_score(Ytest,predicted_values))
plt.title(all_sample_title, size = 15);
plt.show()
data = pd.DataFrame([[83, 45, 60, 28, 70.3, 7.0]], columns=['N', 'P','K','temperature', 'humidity', 'ph'])
prediction = DecisionTree.predict(data)
print(prediction)
['jute']
from sklearn.svm import SVC
SVM = SVC(gamma='auto')
SVM.fit(Xtrain,Ytrain)
predicted_values = SVM.predict(Xtest)
x = metrics.accuracy_score(Ytest, predicted_values)
acc.append(x)
model.append('SVM')
print("SVM's Accuracy is: ", x)
print(classification_report(Ytest,predicted_values))
SVM's Accuracy is: 0.31363636363636366
precision recall f1-score support
apple 1.00 0.62 0.76 13
banana 1.00 0.47 0.64 17
blackgram 0.67 0.38 0.48 16
chickpea 1.00 0.24 0.38 21
coconut 1.00 0.33 0.50 21
coffee 1.00 0.23 0.37 22
cotton 1.00 0.50 0.67 20
grapes 1.00 0.22 0.36 18
jute 1.00 0.25 0.40 28
kidneybeans 0.05 1.00 0.09 14
lentil 0.75 0.13 0.22 23
maize 1.00 0.10 0.17 21
mango 1.00 0.15 0.27 26
mothbeans 1.00 0.21 0.35 19
mungbean 1.00 0.21 0.34 24
muskmelon 0.92 0.52 0.67 23
orange 1.00 0.10 0.19 29
papaya 1.00 0.37 0.54 19
pigeonpeas 0.33 0.11 0.17 18
pomegranate 0.91 0.59 0.71 17
rice 0.67 0.50 0.57 16
watermelon 0.80 0.27 0.40 15
accuracy 0.31 440
macro avg 0.87 0.34 0.42 440
weighted avg 0.89 0.31 0.41 440
# Cross validation score (SVM)
score = cross_val_score(SVM,features,target,cv=5)
score
array([0.48863636, 0.44772727, 0.44090909, 0.475 , 0.475 ])
data = pd.DataFrame([[83, 45, 60, 28, 70.3, 7.0]], columns=['N', 'P','K','temperature', 'humidity', 'ph'])
prediction = DecisionTree.predict(data)
print(prediction)
['jute']
from sklearn.ensemble import RandomForestClassifier
RF = RandomForestClassifier(n_estimators=30, max_depth=14, random_state=0)
RF.fit(Xtrain,Ytrain)
predicted_values = RF.predict(Xtest)
x = metrics.accuracy_score(Ytest, predicted_values)
acc.append(x)
model.append('RF')
print("RF's Accuracy is: ", x)
print(classification_report(Ytest,predicted_values))
RF's Accuracy is: 0.975
precision recall f1-score support
apple 1.00 1.00 1.00 13
banana 1.00 1.00 1.00 17
blackgram 0.88 0.88 0.88 16
chickpea 1.00 1.00 1.00 21
coconut 1.00 1.00 1.00 21
coffee 1.00 1.00 1.00 22
cotton 1.00 1.00 1.00 20
grapes 1.00 1.00 1.00 18
jute 0.96 0.89 0.93 28
kidneybeans 1.00 1.00 1.00 14
lentil 0.91 0.91 0.91 23
maize 1.00 1.00 1.00 21
mango 1.00 1.00 1.00 26
mothbeans 0.94 0.89 0.92 19
mungbean 1.00 1.00 1.00 24
muskmelon 1.00 1.00 1.00 23
orange 1.00 1.00 1.00 29
papaya 1.00 1.00 1.00 19
pigeonpeas 0.89 0.94 0.92 18
pomegranate 1.00 1.00 1.00 17
rice 0.83 0.94 0.88 16
watermelon 1.00 1.00 1.00 15
accuracy 0.97 440
macro avg 0.97 0.98 0.97 440
weighted avg 0.98 0.97 0.98 440
# Cross validation score (Random Forest)
score = cross_val_score(RF,features,target,cv=5)
score
array([0.97272727, 0.96136364, 0.97045455, 0.96136364, 0.97045455])
# Dump the trained Naive Bayes classifier with Pickle
RF_pkl_filename = 'RandomForest.pkl'
# Open the file to save as pkl file
RF_Model_pkl = open(RF_pkl_filename, 'wb')
pickle.dump(RF, RF_Model_pkl)
# Close the pickle instances
RF_Model_pkl.close()
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(Ytest, predicted_values)
plt.figure(figsize=(15,15))
sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Confusion Matrix - score:'+str(accuracy_score(Ytest,predicted_values))
plt.title(all_sample_title, size = 15);
plt.show()
data = pd.DataFrame([[83, 45, 60, 28, 70.3, 7.0]], columns=['N', 'P','K','temperature', 'humidity', 'ph'])
prediction = RF.predict(data)
print(prediction)
['jute']
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors= 15, algorithm = 'kd_tree', metric = 'manhattan')
knn.fit(Xtrain,Ytrain)
predicted_values = knn.predict(Xtest)
x = metrics.accuracy_score(Ytest, predicted_values)
acc.append(x)
model.append('KNN')
print("KNN's Accuracy is: ", x)
print(classification_report(Ytest,predicted_values))
KNN's Accuracy is: 0.9318181818181818
precision recall f1-score support
apple 1.00 1.00 1.00 13
banana 1.00 1.00 1.00 17
blackgram 0.64 0.88 0.74 16
chickpea 1.00 1.00 1.00 21
coconut 1.00 1.00 1.00 21
coffee 1.00 1.00 1.00 22
cotton 0.95 1.00 0.98 20
grapes 1.00 1.00 1.00 18
jute 1.00 0.54 0.70 28
kidneybeans 1.00 1.00 1.00 14
lentil 0.72 0.78 0.75 23
maize 1.00 0.95 0.98 21
mango 1.00 1.00 1.00 26
mothbeans 0.94 0.89 0.92 19
mungbean 1.00 1.00 1.00 24
muskmelon 1.00 1.00 1.00 23
orange 1.00 1.00 1.00 29
papaya 1.00 1.00 1.00 19
pigeonpeas 1.00 0.61 0.76 18
pomegranate 1.00 1.00 1.00 17
rice 0.55 1.00 0.71 16
watermelon 1.00 1.00 1.00 15
accuracy 0.93 440
macro avg 0.95 0.94 0.93 440
weighted avg 0.95 0.93 0.93 440
# Cross validation score (KNN)
score = cross_val_score(knn,features,target,cv=5)
score
array([0.92727273, 0.95454545, 0.93863636, 0.91590909, 0.91590909])
# Dump the trained KNN classifier with Pickle
knn_pkl_filename = 'knn.pkl'
# Open the file to save as pkl file
knn_Model_pkl = open(knn_pkl_filename, 'wb')
pickle.dump(knn, knn_Model_pkl)
# Close the pickle instances
knn_Model_pkl.close()
cm = confusion_matrix(Ytest, predicted_values)
plt.figure(figsize=(15,15))
sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Confusion Matrix - score:'+str(accuracy_score(Ytest,predicted_values))
plt.title(all_sample_title, size = 15);
plt.show()
data = pd.DataFrame([[83, 45, 60, 28, 70.3, 7.0]], columns=['N', 'P','K','temperature', 'humidity', 'ph'])
prediction = knn.predict(data)
print(prediction)
['jute']
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
Ytrain = label_encoder.fit_transform(Ytrain)
Ytest = label_encoder.fit_transform(Ytest)
target = label_encoder.fit_transform(target)
XB = xgb.XGBClassifier()
param_grid = {
'learning_rate': [0.055],
'max_depth': [3],
'n_estimators': [150],
'subsample': [ 1.0],
'colsample_bytree': [1.0],
'reg_alpha': [0.1],
'reg_lambda': [0.1],
'min_child_weight': [1]
}
grid_search = GridSearchCV(estimator=XB, param_grid=param_grid, cv=5)
grid_search.fit(Xtrain,Ytrain)
best_model = grid_search.best_estimator_
predicted_values = best_model.predict(Xtest)
x = metrics.accuracy_score(Ytest, predicted_values)
acc.append(x)
model.append('XGBoost')
print("XGBoost's Accuracy is: ", x)
XGBoost's Accuracy is: 0.9704545454545455
# Cross validation score (XGBoost)
score = cross_val_score(best_model,features,target,cv=5)
score
array([0.96818182, 0.96363636, 0.96363636, 0.96363636, 0.96590909])
cm = confusion_matrix(Ytest, predicted_values)
plt.figure(figsize=(15,15))
sns.heatmap(cm, annot=True, fmt=".0f", linewidths=.5, square = True, cmap = 'Blues');
plt.ylabel('Actual label');
plt.xlabel('Predicted label');
all_sample_title = 'Confusion Matrix - score:'+str(accuracy_score(Ytest,predicted_values))
plt.title(all_sample_title, size = 15);
plt.show()
from sklearn.preprocessing import LabelEncoder
data = pd.DataFrame([[85,58,41,21.770462,80.319644,7.038096]], columns=['N', 'P','K','temperature', 'humidity', 'ph'])
prediction = best_model.predict(data)
print(prediction)
[20]
data = pd.DataFrame([[83, 55, 65, 28, 70.3, 7.0]], columns=['N', 'P','K','temperature', 'humidity', 'ph'])
prediction = best_model.predict(data)
print(prediction)
[5]